Setup + True SR and True Bat Avg.¶
In [2]:
import os
import json
from collections import defaultdict
import matplotlib.pyplot as plt
import pandas as pd
# === CONFIGURATION ===
IPL_JSON_PATH = "../ipl_json" # path to folder containing IPL JSON files
SEASONS_FILTER = set({"2024"}) # example: {"2020", "2021"}; empty set = include all
SELECTED_PLAYERS = set() # example: {"V Kohli", "MS Dhoni"}; empty set = include all
MIN_BALLS_FACED = 100 # minimum balls faced to include player in output
# === DATA STRUCTURE ===
player_stats = defaultdict(lambda: {
'runs': 0,
'outs': 0,
'balls': 0,
'baseline_runs': 0,
'baseline_outs': 0,
'baseline_balls': 0,
})
def extract_top6(info):
players = info.get("players", {})
top6 = set()
for team in players.values():
top6.update(team[:6])
return top6
def process_match(filepath):
with open(filepath, "r", encoding="utf-8") as f:
match = json.load(f)
season = str(match["info"].get("season"))
if SEASONS_FILTER and season not in SEASONS_FILTER:
return
top6 = extract_top6(match["info"])
match_player_data = defaultdict(lambda: {'runs': 0, 'balls': 0, 'outs': 0})
for inning in match.get("innings", []):
for over in inning.get("overs", []):
for delivery in over.get("deliveries", []):
batter = delivery.get("batter")
runs = delivery.get("runs", {}).get("batter", 0)
match_player_data[batter]['runs'] += runs
match_player_data[batter]['balls'] += 1
if "wickets" in delivery:
for w in delivery["wickets"]:
if w.get("kind") != "run out": # Ignore run-outs if you want
out_player = w.get("player_out")
match_player_data[out_player]['outs'] += 1
# Aggregate per match
baseline_runs = sum(match_player_data[p]['runs'] for p in top6)
baseline_outs = sum(match_player_data[p]['outs'] for p in top6)
baseline_balls = sum(match_player_data[p]['balls'] for p in top6)
for player, data in match_player_data.items():
if SELECTED_PLAYERS and player not in SELECTED_PLAYERS:
continue
player_stats[player]['runs'] += data['runs']
player_stats[player]['balls'] += data['balls']
player_stats[player]['outs'] += data['outs']
player_stats[player]['baseline_runs'] += baseline_runs
player_stats[player]['baseline_outs'] += baseline_outs
player_stats[player]['baseline_balls'] += baseline_balls
# === MAIN PROCESSING ===
all_files = [os.path.join(IPL_JSON_PATH, f) for f in os.listdir(IPL_JSON_PATH) if f.endswith(".json")]
for f in all_files:
process_match(f)
# === COMPUTE TRUE AVERAGE & TRUE STRIKE RATE ===
results = []
for player, data in player_stats.items():
if data['outs'] == 0 or data['baseline_outs'] == 0 or data['balls'] < MIN_BALLS_FACED or data['baseline_balls'] == 0:
continue # skip if average or SR is undefined or not enough data
avg = data['runs'] / data['outs']
baseline_avg = data['baseline_runs'] / data['baseline_outs']
true_avg_pct = (avg / baseline_avg - 1) * 100
strike_rate = data['runs'] / data['balls'] * 100
baseline_sr = data['baseline_runs'] / data['baseline_balls'] * 100
true_sr_pct = (strike_rate / baseline_sr - 1) * 100
results.append({
'Player': player,
'Average': round(avg, 2),
'Baseline Avg': round(baseline_avg, 2),
'True Average (%)': round(true_avg_pct, 2),
'Strike Rate': round(strike_rate, 2),
'Baseline SR': round(baseline_sr, 2),
'True SR (%)': round(true_sr_pct, 2),
'Runs': data['runs'],
'Outs': data['outs'],
'Balls': data['balls'],
})
# === OUTPUT ===
df = pd.DataFrame(results)
df.sort_values("True Average (%)", ascending=False, inplace=True)
display(df) # for Jupyter notebooks
# === COMBINED TRUE AVG vs TRUE SR PLOT ===
plt.figure(figsize=(8,6))
plt.scatter(df['True SR (%)'], df['True Average (%)'])
for _, row in df.iterrows():
plt.annotate(row['Player'], (row['True SR (%)'], row['True Average (%)']), fontsize=8)
plt.axhline(0, color='gray', linestyle='--')
plt.axvline(0, color='gray', linestyle='--')
plt.xlabel("True Strike Rate (%)")
plt.ylabel("True Average (%)")
plt.title("True Average vs True Strike Rate: IPL Batters")
plt.grid(True)
plt.show()
| Player | Average | Baseline Avg | True Average (%) | Strike Rate | Baseline SR | True SR (%) | Runs | Outs | Balls | |
|---|---|---|---|---|---|---|---|---|---|---|
| 22 | Shashank Singh | 59.00 | 27.75 | 112.61 | 157.33 | 139.70 | 12.62 | 354 | 6 | 225 |
| 14 | T Stubbs | 63.00 | 31.99 | 96.93 | 185.29 | 157.49 | 17.66 | 378 | 6 | 204 |
| 44 | N Pooran | 62.38 | 34.21 | 82.34 | 168.58 | 142.59 | 18.23 | 499 | 8 | 296 |
| 0 | V Kohli | 61.75 | 35.53 | 73.81 | 149.09 | 154.09 | -3.24 | 741 | 12 | 497 |
| 49 | B Sai Sudharsan | 52.70 | 31.19 | 68.94 | 135.82 | 140.06 | -3.02 | 527 | 10 | 388 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 27 | RK Singh | 18.67 | 31.33 | -40.42 | 143.59 | 151.58 | -5.27 | 168 | 9 | 117 |
| 56 | HH Pandya | 18.00 | 31.89 | -43.56 | 140.26 | 147.20 | -4.72 | 216 | 12 | 154 |
| 7 | AM Rahane | 20.17 | 35.84 | -43.73 | 121.00 | 141.44 | -14.45 | 242 | 12 | 200 |
| 43 | DJ Hooda | 18.12 | 33.42 | -45.77 | 135.51 | 142.12 | -4.65 | 145 | 8 | 107 |
| 47 | WP Saha | 15.11 | 29.30 | -48.43 | 113.33 | 139.11 | -18.53 | 136 | 9 | 120 |
67 rows × 10 columns
In [3]:
df.to_csv('ipl_player_stats.csv', index=False)
Batting Impact¶
In [4]:
# Normalize True Average and True SR using min-max scaling
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[['Norm True Avg', 'Norm True SR']] = scaler.fit_transform(df[['True Average (%)', 'True SR (%)']])
In [5]:
# Equal weight for both components
df['Total Batting Impact'] = (df['Norm True Avg'] + df['Norm True SR']) / 2
df.sort_values('Total Batting Impact', ascending=False, inplace=True)
display(df[['Player', 'True Average (%)', 'True SR (%)', 'Total Batting Impact']])
| Player | True Average (%) | True SR (%) | Total Batting Impact | |
|---|---|---|---|---|
| 22 | Shashank Singh | 112.61 | 12.62 | 0.803878 |
| 14 | T Stubbs | 96.93 | 17.66 | 0.796432 |
| 44 | N Pooran | 82.34 | 18.23 | 0.755796 |
| 64 | J Fraser-McGurk | 32.86 | 36.59 | 0.752391 |
| 25 | VR Iyer | 67.36 | 3.98 | 0.592693 |
| ... | ... | ... | ... | ... |
| 40 | Q de Kock | -37.91 | -10.82 | 0.144756 |
| 11 | DA Warner | -29.07 | -15.28 | 0.135711 |
| 7 | AM Rahane | -43.73 | -14.45 | 0.096985 |
| 31 | AK Markram | -24.36 | -24.52 | 0.074733 |
| 47 | WP Saha | -48.43 | -18.53 | 0.049010 |
67 rows × 4 columns
In [6]:
# Optional: bar chart of top 15 batters by impact
top_n = 15
plt.figure(figsize=(10,6))
top_df = df.head(top_n)
plt.barh(top_df['Player'], top_df['Total Batting Impact'], color='skyblue')
plt.xlabel('Total Batting Impact Score')
for index, value in enumerate(top_df['Total Batting Impact']):
plt.text(value, index, f'{value:.2f}', va='center')
plt.title(f'Top {top_n} IPL Batters by Combined Impact')
plt.gca().invert_yaxis()
plt.show()
Bowling Impact¶
True Wickets¶
In [7]:
# === TRUE WICKETS FOR BOWLERS ===
from collections import defaultdict
bowler_wickets = defaultdict(float)
def get_over_phase(over_num):
if over_num < 6:
return 2.0 # Powerplay
elif over_num < 15:
return 1.0 # Middle
else:
return 0.5 # Death
for f in all_files:
with open(f, "r", encoding="utf-8") as match_file:
match = json.load(match_file)
for innings in match.get("innings", []):
for over in innings.get("overs", []):
over_num = over.get("over", 0)
for delivery in over.get("deliveries", []):
bowler = delivery.get("bowler")
if "wickets" in delivery:
for w in delivery["wickets"]:
kind = w.get("kind")
if kind and kind != "run out":
phase_weight = get_over_phase(over_num)
bowler_wickets[bowler] += phase_weight
# Create a DataFrame of true wickets
bowler_df = pd.DataFrame([
{"Bowler": b, "True Wickets": round(w, 2)} for b, w in bowler_wickets.items()
]).sort_values("True Wickets", ascending=False)
display(bowler_df.head(20))
| Bowler | True Wickets | |
|---|---|---|
| 94 | R Ashwin | 214.0 |
| 7 | B Kumar | 203.0 |
| 16 | PP Chawla | 193.5 |
| 2 | YS Chahal | 192.5 |
| 46 | UT Yadav | 176.0 |
| 39 | SP Narine | 171.5 |
| 19 | Sandeep Sharma | 170.0 |
| 44 | Harbhajan Singh | 167.0 |
| 18 | TA Boult | 160.5 |
| 36 | JJ Bumrah | 159.5 |
| 43 | A Mishra | 159.0 |
| 37 | SL Malinga | 153.0 |
| 50 | RA Jadeja | 151.5 |
| 4 | Rashid Khan | 144.5 |
| 82 | Mohammed Shami | 144.0 |
| 120 | I Sharma | 139.0 |
| 29 | Z Khan | 137.5 |
| 55 | MM Sharma | 136.5 |
| 103 | DJ Bravo | 135.5 |
| 42 | DL Chahar | 131.0 |
True Economy¶
In [8]:
# === TRUE ECONOMY RATE BY PHASE ===
from collections import defaultdict
# Store bowler stats per phase across matches
bowler_phase_stats = defaultdict(lambda: {
"powerplay_runs": 0, "powerplay_balls": 0,
"middle_runs": 0, "middle_balls": 0,
"death_runs": 0, "death_balls": 0,
"true_economy_contributions": []
})
def get_phase(over_num):
if over_num < 6:
return "powerplay"
elif over_num < 15:
return "middle"
else:
return "death"
for f in all_files:
with open(f, "r", encoding="utf-8") as match_file:
match = json.load(match_file)
# Temp structure to hold all bowlers’ phase stats for this match
match_phase_runs = {
"powerplay": defaultdict(lambda: {"runs": 0, "balls": 0}),
"middle": defaultdict(lambda: {"runs": 0, "balls": 0}),
"death": defaultdict(lambda: {"runs": 0, "balls": 0}),
}
for innings in match.get("innings", []):
for over in innings.get("overs", []):
over_num = over.get("over", 0)
phase = get_phase(over_num)
for delivery in over.get("deliveries", []):
bowler = delivery.get("bowler")
runs = delivery.get("runs", {}).get("total", 0)
match_phase_runs[phase][bowler]["runs"] += runs
match_phase_runs[phase][bowler]["balls"] += 1
# After processing match, compute baseline and true economy for each bowler per phase
for phase in ["powerplay", "middle", "death"]:
all_runs = sum(b["runs"] for b in match_phase_runs[phase].values())
all_balls = sum(b["balls"] for b in match_phase_runs[phase].values())
if all_balls == 0:
continue
baseline_er = all_runs / (all_balls / 6) # runs per over
for bowler, stats in match_phase_runs[phase].items():
if stats["balls"] == 0:
continue
er = stats["runs"] / (stats["balls"] / 6)
if baseline_er == 0:
continue # skip this comparison to avoid division by zero
true_er_pct = ((er / baseline_er) - 1) * 100
# Update global stats
bowler_phase_stats[bowler][f"{phase}_runs"] += stats["runs"]
bowler_phase_stats[bowler][f"{phase}_balls"] += stats["balls"]
bowler_phase_stats[bowler]["true_economy_contributions"].append(true_er_pct)
# Final output: average true economy contribution per bowler
bowler_true_economy = []
for bowler, data in bowler_phase_stats.items():
all_contributions = data["true_economy_contributions"]
if not all_contributions:
continue
avg_true_economy = sum(all_contributions) / len(all_contributions)
bowler_true_economy.append({
"Bowler": bowler,
"True Economy (%)": round(avg_true_economy, 2),
"Matches": len(all_contributions)
})
bowler_er_df = pd.DataFrame(bowler_true_economy).sort_values("True Economy (%)")
display(bowler_er_df.head(20))
| Bowler | True Economy (%) | Matches | |
|---|---|---|---|
| 518 | AC Gilchrist | -100.00 | 1 |
| 535 | Sachin Baby | -60.48 | 2 |
| 292 | R Ravindra | -44.87 | 2 |
| 93 | NB Singh | -41.20 | 2 |
| 306 | N Wadhera | -32.74 | 1 |
| 237 | SS Iyer | -29.26 | 1 |
| 409 | SM Harwood | -23.48 | 8 |
| 291 | MP Yadav | -22.60 | 7 |
| 243 | Ramandeep Singh | -21.60 | 5 |
| 229 | Shashank Singh | -21.51 | 3 |
| 183 | DA Warner | -20.71 | 1 |
| 277 | JE Root | -19.41 | 1 |
| 311 | DS Rathi | -19.04 | 6 |
| 233 | A Badoni | -18.06 | 5 |
| 420 | AM Rahane | -17.41 | 1 |
| 28 | SP Narine | -15.59 | 431 |
| 330 | GD McGrath | -15.36 | 28 |
| 210 | RV Patel | -15.33 | 2 |
| 245 | M Pathirana | -15.31 | 40 |
| 21 | JJ Bumrah | -15.09 | 354 |
True Runs Conceded¶
In [9]:
# === TRUE RUNS CONCEDED (refined) ===
true_runs_conceded = []
for bowler, data in bowler_phase_stats.items():
total_balls = (
data['powerplay_balls'] +
data['middle_balls'] +
data['death_balls']
)
if total_balls == 0:
continue
total_runs = (
data['powerplay_runs'] +
data['middle_runs'] +
data['death_runs']
)
overs = total_balls / 6
raw_er = total_runs / overs if overs else 0
contributions = data['true_economy_contributions']
avg_true_er_pct = sum(contributions) / len(contributions) if contributions else 0
# Apply true economy adjustment
true_er = raw_er * (1 + avg_true_er_pct / 100)
true_runs = true_er * overs # final adjusted runs conceded
true_runs_conceded.append({
"Bowler": bowler,
"Balls": total_balls,
"Raw ER": round(raw_er, 2),
"True ER (%)": round(avg_true_er_pct, 2),
"True ER": round(true_er, 2),
"True Runs Conceded": round(true_runs, 2)
})
true_runs_df = pd.DataFrame(true_runs_conceded).sort_values("True Runs Conceded", ascending=False)
display(true_runs_df.tail(20))
| Bowler | Balls | Raw ER | True ER (%) | True ER | True Runs Conceded | |
|---|---|---|---|---|---|---|
| 297 | M Shahrukh Khan | 12 | 8.00 | -13.09 | 6.95 | 13.91 |
| 277 | JE Root | 12 | 7.50 | -19.41 | 6.04 | 12.09 |
| 225 | D Brevis | 3 | 16.00 | 41.33 | 22.61 | 11.31 |
| 307 | Atharva Taide | 1 | 24.00 | 172.73 | 65.45 | 10.91 |
| 157 | SN Khan | 2 | 18.00 | 78.79 | 32.18 | 10.73 |
| 93 | NB Singh | 25 | 4.32 | -41.20 | 2.54 | 10.58 |
| 505 | SPD Smith | 2 | 15.00 | 103.12 | 30.47 | 10.16 |
| 520 | SA Yadav | 6 | 8.00 | 20.16 | 9.61 | 9.61 |
| 313 | Ishan Kishan | 1 | 24.00 | 131.25 | 55.50 | 9.25 |
| 434 | RS Gavaskar | 6 | 8.00 | 13.13 | 9.05 | 9.05 |
| 306 | N Wadhera | 12 | 6.50 | -32.74 | 4.37 | 8.74 |
| 482 | Y Gnaneswara Rao | 6 | 7.00 | 14.90 | 8.04 | 8.04 |
| 416 | LA Carseldine | 7 | 5.14 | 7.14 | 5.51 | 6.43 |
| 484 | SS Mundhe | 7 | 5.14 | -3.90 | 4.94 | 5.77 |
| 237 | SS Iyer | 6 | 7.00 | -29.26 | 4.95 | 4.95 |
| 420 | AM Rahane | 6 | 5.00 | -17.41 | 4.13 | 4.13 |
| 292 | R Ravindra | 12 | 3.50 | -44.87 | 1.93 | 3.86 |
| 535 | Sachin Baby | 10 | 4.80 | -60.48 | 1.90 | 3.16 |
| 183 | DA Warner | 2 | 6.00 | -20.71 | 4.76 | 1.59 |
| 518 | AC Gilchrist | 1 | 0.00 | -100.00 | 0.00 | 0.00 |
In [10]:
# === TRUE BOWLING AVERAGE ===
# Set minimum threshold for career deliveries bowled
MIN_BALLS_BOWLED = 300
# Get total balls per bowler from phase stats
bowler_total_balls = {
b: data['powerplay_balls'] + data['middle_balls'] + data['death_balls']
for b, data in bowler_phase_stats.items()
}
# Filter true_runs_df and bowler_df to only include bowlers with enough balls
qualified_bowlers = {b for b, balls in bowler_total_balls.items() if balls >= MIN_BALLS_BOWLED}
true_runs_df = true_runs_df[true_runs_df['Bowler'].isin(qualified_bowlers)]
bowler_df = bowler_df[bowler_df['Bowler'].isin(qualified_bowlers)]
# Merge true runs and true wickets
true_avg_df = pd.merge(true_runs_df, bowler_df, on="Bowler", how="inner")
# Filter out bowlers with 0 true wickets to avoid division by zero
true_avg_df = true_avg_df[true_avg_df["True Wickets"] > 0].copy()
# Calculate true average
true_avg_df["True Bowling Average"] = true_avg_df["True Runs Conceded"] / true_avg_df["True Wickets"]
true_avg_df["True Bowling Average"] = true_avg_df["True Bowling Average"].round(2)
# Sort by most efficient average (lowest)
true_avg_df.sort_values("True Bowling Average", inplace=True)
display(true_avg_df[["Bowler", "True Runs Conceded", "True Wickets", "True Bowling Average"]].head(20))
| Bowler | True Runs Conceded | True Wickets | True Bowling Average | |
|---|---|---|---|---|
| 195 | AD Mascarenhas | 388.11 | 30.0 | 12.94 |
| 194 | L Ngidi | 399.12 | 25.5 | 15.65 |
| 183 | MF Maharoof | 496.91 | 30.0 | 16.56 |
| 196 | GD McGrath | 309.77 | 18.5 | 16.74 |
| 179 | BW Hilfenhaus | 526.93 | 30.5 | 17.28 |
| 42 | DL Chahar | 2307.11 | 131.0 | 17.61 |
| 148 | DE Bollinger | 684.42 | 37.5 | 18.25 |
| 131 | JR Hazlewood | 822.16 | 44.0 | 18.69 |
| 104 | S Aravind | 1081.56 | 57.5 | 18.81 |
| 95 | MA Starc | 1157.81 | 61.5 | 18.83 |
| 186 | KW Richardson | 491.79 | 25.5 | 19.29 |
| 66 | MM Patel | 1796.37 | 91.5 | 19.63 |
| 26 | Z Khan | 2781.99 | 137.5 | 20.23 |
| 22 | SL Malinga | 3120.49 | 153.0 | 20.40 |
| 140 | DP Nannes | 745.31 | 36.5 | 20.42 |
| 19 | JJ Bumrah | 3260.65 | 159.5 | 20.44 |
| 153 | Mohsin Khan | 673.36 | 32.5 | 20.72 |
| 17 | TA Boult | 3349.53 | 160.5 | 20.87 |
| 86 | S Sreesanth | 1356.29 | 65.0 | 20.87 |
| 155 | Harshit Rana | 653.36 | 31.0 | 21.08 |
In [11]:
# Filter top 50 by lowest True Average and top 50 by lowest True ER
top_avg_bowlers = set(true_avg_df.nsmallest(50, 'True Bowling Average')["Bowler"])
top_er_bowlers = set(true_avg_df.nsmallest(50, 'True ER')["Bowler"])
# Intersection of both sets
top_bowlers = top_avg_bowlers & top_er_bowlers
# Filter the DataFrame
top_df = true_avg_df[true_avg_df["Bowler"].isin(top_bowlers)].copy()
plt.figure(figsize=(8,6))
plt.scatter(top_df['True ER'], top_df['True Bowling Average'])
for _, row in top_df.iterrows():
plt.annotate(row['Bowler'], (row['True ER'], row['True Bowling Average']), fontsize=8)
plt.xlabel("True Economy Rate (runs per over)")
plt.ylabel("True Bowling Average (runs per wicket)")
plt.title("Top 50 IPL Bowlers: True Avg vs True Economy")
plt.axhline(top_df['True Bowling Average'].mean(), color='gray', linestyle='--', label='Avg True Avg')
plt.axvline(top_df['True ER'].mean(), color='gray', linestyle='--', label='Avg True ER')
plt.grid(True)
plt.legend()
plt.show()
Bowling impact¶
In [12]:
from sklearn.preprocessing import MinMaxScaler
# We inverse the metrics so that lower values give higher normalized impact
top_df = top_df.copy()
scaler = MinMaxScaler()
top_df[['Norm Avg', 'Norm ER']] = scaler.fit_transform(
top_df[['True Bowling Average', 'True ER']].apply(lambda x: -x)
)
# Combine equally for Total Bowling Impact
top_df['Total Bowling Impact'] = (top_df['Norm Avg'] + top_df['Norm ER']) / 2
top_df.sort_values('Total Bowling Impact', ascending=False, inplace=True)
display(top_df[['Bowler', 'True Bowling Average', 'True ER', 'Total Bowling Impact']].head(20))
| Bowler | True Bowling Average | True ER | Total Bowling Impact | |
|---|---|---|---|---|
| 196 | GD McGrath | 16.74 | 5.65 | 0.821092 |
| 183 | MF Maharoof | 16.56 | 6.76 | 0.550672 |
| 194 | L Ngidi | 15.65 | 7.00 | 0.533215 |
| 195 | AD Mascarenhas | 12.94 | 7.51 | 0.532663 |
| 19 | JJ Bumrah | 20.44 | 6.14 | 0.523777 |
| 9 | SP Narine | 23.13 | 5.71 | 0.505169 |
| 43 | DW Steyn | 21.56 | 6.01 | 0.503710 |
| 120 | A Kumble | 23.51 | 5.67 | 0.497329 |
| 22 | SL Malinga | 20.40 | 6.30 | 0.485459 |
| 23 | Rashid Khan | 21.17 | 6.28 | 0.454232 |
| 148 | DE Bollinger | 18.25 | 6.84 | 0.451005 |
| 140 | DP Nannes | 20.42 | 6.49 | 0.436779 |
| 2 | R Ashwin | 22.77 | 6.22 | 0.393978 |
| 180 | M Pathirana | 23.22 | 6.23 | 0.370279 |
| 96 | JC Archer | 21.24 | 6.72 | 0.340384 |
| 56 | CV Varun | 21.13 | 7.05 | 0.262648 |
| 26 | Z Khan | 20.23 | 7.33 | 0.234669 |
| 131 | JR Hazlewood | 18.69 | 7.64 | 0.229284 |
| 99 | Iqbal Abdulla | 22.04 | 7.15 | 0.194679 |
| 4 | B Kumar | 23.26 | 6.98 | 0.179953 |
In [13]:
# === BAR CHART: TOP 20 TOTAL BOWLING IMPACT ===
top_n = 20
impact_df = top_df.head(top_n).sort_values("Total Bowling Impact", ascending=True) # for horizontal bar
plt.figure(figsize=(10, 6))
plt.barh(impact_df['Bowler'], impact_df['Total Bowling Impact'], color='seagreen')
plt.xlabel("Total Bowling Impact Score")
plt.title(f"Top {top_n} IPL Bowlers by Total Bowling Impact")
plt.grid(axis='x')
plt.tight_layout()
plt.show()
Bowling impact score¶
In [14]:
# === TABLE: BOWLING IMPACT POINTS ===
top_df['Bowling Impact Points'] = top_df['Total Bowling Impact'] * top_df['Balls']
top_df.sort_values('Bowling Impact Points', ascending=False, inplace=True)
display(top_df[['Bowler', 'Balls', 'Total Bowling Impact', 'Bowling Impact Points']].head(20))
| Bowler | Balls | Total Bowling Impact | Bowling Impact Points | |
|---|---|---|---|---|
| 9 | SP Narine | 4170 | 0.505169 | 2106.556582 |
| 2 | R Ashwin | 4703 | 0.393978 | 1852.878032 |
| 19 | JJ Bumrah | 3185 | 0.523777 | 1668.229992 |
| 22 | SL Malinga | 2974 | 0.485459 | 1443.755993 |
| 23 | Rashid Khan | 2925 | 0.454232 | 1328.628843 |
| 43 | DW Steyn | 2282 | 0.503710 | 1149.465529 |
| 4 | B Kumar | 4060 | 0.179953 | 730.610964 |
| 26 | Z Khan | 2276 | 0.234669 | 534.106890 |
| 120 | A Kumble | 983 | 0.497329 | 488.874332 |
| 56 | CV Varun | 1672 | 0.262648 | 439.147432 |
| 96 | JC Archer | 1033 | 0.340384 | 351.616311 |
| 140 | DP Nannes | 689 | 0.436779 | 300.940730 |
| 148 | DE Bollinger | 600 | 0.451005 | 270.603015 |
| 196 | GD McGrath | 329 | 0.821092 | 270.139360 |
| 183 | MF Maharoof | 441 | 0.550672 | 242.846521 |
| 180 | M Pathirana | 503 | 0.370279 | 186.250282 |
| 99 | Iqbal Abdulla | 943 | 0.194679 | 183.581982 |
| 194 | L Ngidi | 342 | 0.533215 | 182.359382 |
| 195 | AD Mascarenhas | 310 | 0.532663 | 165.125628 |
| 5 | YS Chahal | 3647 | 0.045226 | 164.939698 |
In [15]:
# === BAR CHART: TOP 20 BOWLING IMPACT POINTS ===
top_n = 20
impact_points_df = top_df.head(top_n).sort_values("Bowling Impact Points", ascending=True)
plt.figure(figsize=(10, 6))
plt.barh(impact_points_df['Bowler'], impact_points_df['Bowling Impact Points'], color='darkorange')
plt.xlabel("Bowling Impact Points")
plt.title(f"Top {top_n} IPL Bowlers by Bowling Impact Points")
plt.grid(axis='x')
plt.tight_layout()
plt.show()
In [16]:
import plotly.io as pio
pio.renderers.default = 'notebook'
import plotly.express as px
scatter_df = top_df.nlargest(50, 'Total Bowling Impact').copy()
fig = px.scatter(
scatter_df,
x='True Runs Conceded',
y='Total Bowling Impact',
size='Balls',
text='Bowler',
hover_data={
'True Runs Conceded': ':.1f',
'Total Bowling Impact': ':.3f',
'Balls': True,
'Bowler': False
},
color='Total Bowling Impact',
color_continuous_scale='Viridis',
title='Top 50 IPL Bowlers: True Runs vs Total Bowling Impact',
height=600
)
fig.update_traces(textposition='top center')
fig.update_layout(xaxis_title='True Runs Conceded', yaxis_title='Total Bowling Impact')
fig.show()